c*******************************************************************
c   oned.f - a solution to the Poisson problem using Jacobi 
c   interation on a 1-d decomposition
c     
c   The size of the domain is read by processor 0 and broadcast to
c   all other processors.  The Jacobi iteration is run until the 
c   change in successive elements is small or a maximum number of 
c   iterations is reached.  The difference is printed out at each 
c   step.
c*******************************************************************
      program main 
C
      include "mpif.h"
      integer maxn
      parameter (maxn = 128)
      double precision  a(maxn,maxn), b(maxn,maxn), f(maxn,maxn)
      save a
      integer nx, ny
      integer myid, numprocs, ierr
      integer comm1d, nbrbottom, nbrtop, s, e, it
      integer sizedouble, win, winb
      integer *8 winsize
      double precision diff, diffnorm, dwork
      double precision t1, t2
      external diff

      call MPI_INIT( ierr )
      call MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
      call MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
c
      if (myid .eq. 0) then
c
c         Get the size of the problem
c
c          print *, 'Enter nx'
c          read *, nx
           nx = 110
      endif
      call MPI_BCAST(nx,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
      ny   = nx
c
c Get a new communicator for a decomposition of the domain
c
      call MPI_CART_CREATE( MPI_COMM_WORLD, 1, numprocs, .false., 
     $                    .true., comm1d, ierr )
c
c Get my position in this communicator, and my neighbors
c 
      call MPI_COMM_RANK( comm1d, myid, ierr )
      call MPI_Cart_shift( comm1d, 0,  1, nbrbottom, nbrtop, ierr   )
c
c Compute the actual decomposition
c     
      call MPE_DECOMP1D( ny, numprocs, myid, s, e )
c
c Initialize the right-hand-side (f) and the initial solution guess (a)
c
      call onedinit( a, b, f, nx, s, e )
c
c Create the RMA window
      call mpi_type_size( MPI_DOUBLE_PRECISION, sizedouble, ierr)
      winsize = (nx+2)*(e-s+3)*sizedouble
      call mpi_win_create( a, winsize, sizedouble, 
     *     MPI_INFO_NULL, MPI_COMM_WORLD, win, ierr )
      call mpi_win_create( b, winsize, sizedouble, 
     *     MPI_INFO_NULL, MPI_COMM_WORLD, winb, ierr )
c
c
c Actually do the computation.  Note the use of a collective operation to
c check for convergence, and a do-loop to bound the number of iterations.
c
      call MPI_BARRIER( MPI_COMM_WORLD, ierr )
      t1 = MPI_WTIME()
      do 10 it=1, 100
c      do 10 it=1, 2
	call exchng1( a, nx, s, e, win, nbrbottom, nbrtop )
	call sweep1d( a, f, nx, s, e, b )
	call exchng1( b, nx, s, e, winb, nbrbottom, nbrtop )
	call sweep1d( b, f, nx, s, e, a )
	dwork = diff( a, b, nx, s, e )
	call MPI_Allreduce( dwork, diffnorm, 1, MPI_DOUBLE_PRECISION, 
     $                      MPI_SUM, comm1d, ierr )
        if (diffnorm .lt. 1.0e-5) goto 20
        if (myid .eq. 0) print *, 2*it, ' Difference is ', diffnorm
10     continue
      if (myid .eq. 0) print *, 'Failed to converge'
20    continue
      t2 = MPI_WTIME()
      if (myid .eq. 0) then
         print *, 'Converged after ', 2*it, ' Iterations in ', t2 - t1,
     $        ' secs '
      endif
c
      call MPI_WIN_FREE( win, ierr )
      call MPI_WIN_FREE( winb, ierr )
      call MPI_FINALIZE(ierr)
      end


